* Reset settings and initialize log file
launch, path("build/geo_builder")

*-------------------------------------------------------------------------------
* Price and Wasserman (2024), "The Summer Drop in Female Employment"
*
* Description: Prepare geographic files.
*-------------------------------------------------------------------------------


* Create a list of state names/FIPS codes
*-------------------------------------------------------------------------------

* Load state names, abbreviations, and FIPS codes
import delimited using "$basepath/libraries/census/state.txt", delimiter("|") varnames(1) clear

* Standardize variable names
rename state state_fips
rename stusab state_abbrev

* Restrict to the 50 states + DC
keep if inrange(state_fips, 1, 56)

* Assign Census divisions
gen byte division = .
replace division = 1 if inlist(state_fips, 9, 23, 25, 33, 44, 50)
replace division = 2 if inlist(state_fips, 34, 36, 42)
replace division = 3 if inlist(state_fips, 17, 18, 26, 39, 55)
replace division = 4 if inlist(state_fips, 19, 20, 27, 29, 31, 38, 46)
replace division = 5 if inlist(state_fips, 10, 11, 12, 13, 24, 37, 45, 51, 54)
replace division = 6 if inlist(state_fips, 1, 21, 28, 47)
replace division = 7 if inlist(state_fips, 5, 22, 40, 48)
replace division = 8 if inlist(state_fips, 4, 8, 16, 30, 32, 35, 49, 56)
replace division = 9 if inlist(state_fips, 2, 6, 15, 41, 53)

label define division_lbl 1 "New England", replace
label define division_lbl 2 "Middle Atlantic", add
label define division_lbl 3 "East North Central", add
label define division_lbl 4 "West North Central", add
label define division_lbl 5 "South Atlantic", add
label define division_lbl 6 "East South Central", add
label define division_lbl 7 "West South Central", add
label define division_lbl 8 "Mountain", add
label define division_lbl 9 "Pacific", add
label values division division_lbl

* Assign Census regions
gen byte region = .
replace region = 1 if inlist(division, 1, 2)
replace region = 2 if inlist(division, 3, 4)
replace region = 3 if inlist(division, 5, 6, 7)
replace region = 4 if inlist(division, 8, 9)

label define region_lbl 1 "Northeast", replace
label define region_lbl 2 "Midwest", add
label define region_lbl 3 "South", add
label define region_lbl 4 "West", add
label values region region_lbl

* Label variables
label variable state_fips   "State FIPS code"
label variable state_name   "State name"
label variable state_abbrev "State abbreviation"
label variable division     "Census division"
label variable region       "Census region"

* Save a state-level registry
keep state_fips state_name state_abbrev division region
order state_fips state_name state_abbrev division region
sort state_fips
compress
save "$basepath/data/derived/state_fips.dta", replace


* Prepare state-level shapefile
*-------------------------------------------------------------------------------

* Process shapefile
tempfile state_db
shp2dta using "$basepath/libraries/census/tl_2020_us_state/tl_2020_us_state.shp", database("`state_db'") coordinates("$basepath/data/derived/state_coord.dta") genid(_ID) replace

* Simplify the database file
use "`state_db'", clear
keep _ID STATEFP
rename STATEFP state_fips
destring state_fips, replace
order state_fips _ID
sort state_fips
label variable state_fips "State FIPS code"
label variable _ID        "Shapefile ID"
compress
save "$basepath/data/derived/state_db.dta", replace

* Close the log file
unlaunch
